% Replication file for Monte Carlo results in 
% Breusch, Ward, Nguyen, and Kompas 
% "On the Fixed Effects Vector Decomposition" PA 2011
%
% This code was written in the Octave programming language.
% http://www.gnu.org/software/octave/

R = 5000; % number of replications for each experiment

N = 30;
T = 20;
NT = N*T;
const = ones(NT,1);

% parameter range over which we loop
k_rho = [.15 .3 .45 .6];
j_rho = 0:.1:1;

b = [.5; 2; -1.5];    % 'x' coefficients
g = [-2.5; 1.8; 3; 1];% 'z' coefficients

K = length(b)+length(g);

nk = length(k_rho);
nr = length(j_rho);
                    % allocate some storage
mse = cell(nk,1);
mse_ = zeros(nr,4);
est = zeros(4,R);
rhos = cell(nk,1);

randn("seed",1235321);    

% we condition on the observable sample moments
% fixed orthonormal design matrix for time-invariant and group-means  
rxz = randn(N,6);
rxz = rxz - repmat(mean(rxz),N,1);
rxz = rxz*chol(inv(cov(rxz)))';

xd = randn(NT,3);       % fixed design matrix for group-deviations
r = 1:T;                
for n=1:N,
  xd(r,:) = xd(r,:) - repmat(mean(xd(r,:)),T,1); % deviations are mean zero
  r = r+T;
end;
xd = xd*chol(inv(cov(xd)))'; % make xd orthonormal

for k=1:nk, 
  tt = 0;
  nj = nr;
  for j=1:nr,

    rho_uz = j_rho(j);  % covariance of u and z3
    rho_xz = k_rho(k);  % covariance of x and z3

    C = eye(7);
    C(7,6) = rho_uz; C(6,7) = C(7,6);  % uz
    C(1,6) = rho_xz; C(6,1) = C(1,6);  % xz

    sd3 = chol(C(1:6,1:6))(end);   % stdev in z3 of part common to z3 and u
    sdu = rho_uz/sd3;              % stdev in u  of part common to z3 and u
    C(7,7) = sdu^2 + 1;            

    if (any(eig(C)<=0)), nj=j-1; break; end  % invalid covariance spec

    cholC = chol(C);

    randn("seed",12321);    
    for i=1:R,

      if (mod(i,2) == 1),
	randu = randn(N,1);
	rande = randn(NT,1); 
      else                     % antithetic acceleration step
	randu = -randu;
	rande = -rande;
      end

      rnd = [rxz, randu]*cholC;   % note that the sample moments of x and z are set exactly
      X = rnd(:,1:3);             % to conform to the DGP of PT 2007 as closely as possible.
      z = rnd(:,4:6);             
      u = rnd(:,7);               

		      		% expand out time-invariants by T
      u  = repmat(u',T,1); u = u(:);
      z1 = repmat(z(:,1)',T,1); z1 = z1(:);
      z2 = repmat(z(:,2)',T,1); z2 = z2(:);
      z3 = repmat(z(:,3)',T,1); z3 = z3(:);
      x1m = repmat(X(:,1)',T,1); x1m = x1m(:);
      x2m = repmat(X(:,2)',T,1); x2m = x2m(:);
      x3m = repmat(X(:,3)',T,1); x3m = x3m(:);

%      chopped the next line, because fevd = fe for time variant, no need to complicate matters with endogenous x
%      x3m = u;                  % PT assumption (direct to avoid singular cholesky)
      
                                % add on the time-variation 
      x1 = x1m + xd(:,1);       
      x2 = x2m + xd(:,2);
      x3 = x3m + xd(:,3);       
      
      x = [x1, x2, x3];
      z = [z1, z2, z3, const];
      zx = [z, x];
                                % up to this point, only u varies by replication and only z3 by experiment
                                % could hoist lots out of the loop for speed, but it's cleaner to leave it here

      y = x*b + z*g + u + rande;% the DGP
      
      % Having confirmed that the following 3-step FEVD procedure
      % is numerically identical to the IV approach, we comment it out for speed
      % and just use the IV equivalent ....
      %
      % Literal 3-step fevd procedure
      % betafe = xd\y;            % step 1 fixed effects
      % err = y - x*betafe;
      % averr = repmat(mean(reshape(err,T,N)),T,1)(:);
      % gvd = z\averr;            % step 2 regress group-mean error on z and take residuals
      % h = averr - z*gvd;  
      % zxh = [zx h];
      % ptres = zxh\y;            % step 3 regress y on x z h (have confirmed that these are identical to betavd below)
      
 			        % FEVD instrument set
      iv = [z, xd];
      xvd = iv*(iv\zx);
      betavd = xvd\y;
      bvd = betavd(3);
     		  	        % HT estimates, excludes z3 and x3
      iv = [xd, x1, x2, z1, z2, const];
      xht = iv*(iv\zx);
      betaht = xht\y;
      bht = betaht(3);

     		  	        % 'Efficient' estimates, excludes x3 (inconsistent b/c z3 is an instrument)
      iv = [xd, x1, x2, z];
      xef = iv*(iv\zx);
      betaef = xef\y;
      bef = betaef(3);

      % handle ht variance      % here we estimate the on- and off-diagonal elements of the variance matrix
      err  = y - zx*betaht;     % this assumes that e and u are iid. 
      ee = reshape(err,T,N);    % Robust variations are straightforward to implement, using sandwich estimators.
      mn = mean(ee);
      cv = ee - repmat(mn,T,1);
      d1 = sum(sum(cv.*cv))/(N*(T-1)-K); 
      d2 = sum(mn.*mn)/(N-K) - d1/T;     
      omegaht = eye(T)*d1 + repmat(d2,T,T);
      cv1 = cv;
      mn1 = mn;

      % handle ef variance
      err  = y - zx*betaef;
      ee = reshape(err,T,N);
      mn = mean(ee);
      cv = ee - repmat(mn,T,1);
      d1 = sum(sum(cv.*cv))/(N*(T-1)-K); 
      d2 = sum(mn.*mn)/(N-K) - d1/T;
      omegaef = eye(T)*d1 + repmat(d2,T,T);
      cv2 = cv;
      mn2 = mn;

      % handle ht-ef covariance
      d1 = sum(sum(cv1.*cv2))/(N*(T-1)-K); 
      d2 = sum(mn1.*mn2)/(N-K) - d1/T;
      omegaxx = eye(T)*d1 + repmat(d2,T,T);

      diff = bef - bht;
      xht2 = xht'*xht;
      xef2 = xef'*xef;

      % IV variance-covariance formulae (add bias^2 estimate for ef to make it mse).
      % this is just a textbook formula
      s11 = xht2 \ (xht' * kron(speye(N),omegaht) * xht) / xht2;
      s22 = xef2 \ (xef' * kron(speye(N),omegaef) * xef) / xef2 + diff*diff'; 
      s12 = xht2 \ (xht' * kron(speye(N),omegaxx) * xef) / xef2;
      w = (s22(3,3) - s12(3,3)) / (s11(3,3) + s22(3,3) - 2*s12(3,3));
      w = min(max(w,0),1);
      bmix = w*bht + (1-w)*bef;  % weighted average of ef and ht


      % durbin-wu-hausman pre-test for this DGP
      iv = [xd, x1, x2, z1, z2, const]; % all assumed valid instruments
      zresid = z3 - iv*(iv\z3);         % potentially endogenous part of z
      iv = [iv, zresid];
      zrzx = [zx, zresid];              % add zresid to covariates
      xiv = iv*(iv\zrzx);               % test is whether zresid coef != 0
      beta = xiv\y;                     % if reject null (coef=0) must instrument
      err  = y - zrzx*beta;             % otherwise can use z3 as instrument (i.e. FEVD)
      ee = reshape(err,T,N);
      mn = mean(ee);
      cv = ee - repmat(mn,T,1);
      d1 = sum(sum(cv.*cv))/(N*(T-1)-K); 
      d2 = sum(mn.*mn)/(N-K) - d1/T;
      omega = eye(T)*d1 + repmat(d2,T,T);
      xx = xiv'*xiv;
      cv = xx \ (xiv'*kron(speye(N),omega)*xiv) / xx; 
      t = abs(beta(end)/sqrt(cv(end,end)));

      t95 = t > 1.96;                   % ~5% rejection rate under null has been confirmed
      b95 = bef + t95*(bht-bef); 

      est(:,i) = [bvd,bht,bmix,b95];

    end % end monte carlo replications

    % store inner loop results
    err = (est-g(3))';
    mse_(j,:) = diag((err'*err)/(R-1))';

  end % end inner loop (degree of endogeneity)

  % store the valid range of rho for this experiment
  rhos{k} = j_rho(1:nj); 
  % store the mean squared error matrix for this experiment
  mse{k} = mse_(1:nj,:);

end % end outer loop (instrument strength)

  % The MSE results are stored in the cell array 'mse'
  % For example mse{2} is a matrix that contains results for 
  % the loop where k=2 above. Each column corresponds to an estimator:
  % fevd, ht, shrinkage, and pre-test
  % Each row corresponds to an inner loop, over j, above
  % To plot them, e.g. k = 2; plot(rho_k{k}, sqrt(mse{k}));
  % would display the results of the experiment where k=2, so
  % rho_xz = k_rho(k) = 0.3
